Table of Contents¶

  1. Portfolio_marginal_attributes
  2. Portfolio_attributes

In [1]:
import os
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objects as go


from plotly.subplots import make_subplots
from scipy.stats import trim_mean
from scipy.stats import kurtosis, skew
In [2]:
# MANUAL INPUTS

#List portfolio securities here:
portfolio_with_candidate = ["SPY_history.csv", "XLU_history.csv", "XLF_history.csv"]
portfolio_without_candidate= ["XLF_history.csv","SPY_history.csv"]
candidate = ["XLU_history.csv"]

# Define weights_with_candidate (make sure the keys match the column names):
weights_with_candidate = {
    'Return_SPY': 0.3,  # 50% weight to SPY
    'Return_XLU': 0.5,  # 30% weight to XLU
    'Return_XLF': 0.2   # 20% weight to XLF
}

weights_without_candidate = {
    "Return_SPY": 0.6,  # Adjust weights to exclude candidate
    "Return_XLF": 0.4
}

observations_to_keep= 1500
In [3]:
def process_portfolio_data(securities, weights_with_candidate, weights_without_candidate):
    # Define the data folder path
    data_folder = os.path.join(os.path.expanduser("~/Desktop/Trading"), "Data")
    
    # Initialize an empty list to store the DataFrames
    data_frames = []
    
    # Iterate over the list of security files
    for csv in securities:
        # Construct the full file path
        file_path = os.path.join(data_folder, csv)
        
        # Read CSV data into a DataFrame
        data = pd.read_csv(file_path)
        
        # Convert 'Date' column to datetime format
        data['Date'] = pd.to_datetime(data['Date'])
        
        # Extract security name from the file name (e.g., "SPY" from "SPY_history.csv")
        security_name = csv.split('_')[0]
        
        # Add a suffix to column names so each security’s columns remain distinct
        data = data.add_suffix(f'_{security_name}')
        
        # Calculate day-over-day price change for the 'Close' column as the return
        data[f'Return_{security_name}'] = data[f'Close/Last_{security_name}'].pct_change()
        
        # Append DataFrame to the list
        data_frames.append(data)
    
    # Combine all DataFrames column-wise
    combined_data = pd.concat(data_frames, axis=1)
    
    # Calculate the portfolio’s uniform daily return: the average of all individual returns
    return_columns = [col for col in combined_data.columns if col.startswith("Return_")]
    combined_data["portfolio_uniform_daily_return"] = combined_data[return_columns].mean(axis=1)
    
    # Normalize weights for the portfolio with candidate
    total_weight_with = sum(weights_with_candidate.values())
    normalized_weights_with = {col: weight / total_weight_with for col, weight in weights_with_candidate.items()}
    
    # Calculate the portfolio’s weighted daily return (with candidate)
    combined_data["portfolio_weighted_daily_return"] = sum(
        combined_data[col] * weight 
        for col, weight in normalized_weights_with.items() 
        if col in combined_data.columns
    )
    
    # Normalize weights for the portfolio without candidate
    total_weight_without = sum(weights_without_candidate.values())
    normalized_weights_without = {col: weight / total_weight_without for col, weight in weights_without_candidate.items()}
    
    # Calculate the portfolio’s weighted daily return without the candidate
    combined_data["portfolio_weighted_daily_return_without_candidate"] = sum(
        combined_data[col] * weight 
        for col, weight in normalized_weights_without.items() 
        if col in combined_data.columns
    )
    
    # Add an Original_Index column (optional)
    combined_data['Original_Index'] = combined_data.index
    
    return combined_data
In [4]:
# Process the data for different portfolios by passing both dictionaries
portfolio_with_candidate_df = process_portfolio_data(portfolio_with_candidate, weights_with_candidate, weights_without_candidate)
portfolio_without_candidate_df = process_portfolio_data(portfolio_without_candidate, weights_with_candidate, weights_without_candidate)
candidate_df = process_portfolio_data(candidate, weights_with_candidate, weights_without_candidate)
In [5]:
portfolio_with_candidate_df.columns
Out[5]:
Index(['Date_SPY', 'Close/Last_SPY', 'Volume_SPY', 'Open_SPY', 'High_SPY',
       'Low_SPY', 'Return_SPY', 'Date_XLU', 'Close/Last_XLU', 'Volume_XLU',
       'Open_XLU', 'High_XLU', 'Low_XLU', 'Return_XLU', 'Date_XLF',
       'Close/Last_XLF', 'Volume_XLF', 'Open_XLF', 'High_XLF', 'Low_XLF',
       'Return_XLF', 'portfolio_uniform_daily_return',
       'portfolio_weighted_daily_return',
       'portfolio_weighted_daily_return_without_candidate', 'Original_Index'],
      dtype='object')
In [6]:
portfolio_with_candidate_df.head(1)
Out[6]:
Date_SPY Close/Last_SPY Volume_SPY Open_SPY High_SPY Low_SPY Return_SPY Date_XLU Close/Last_XLU Volume_XLU ... Close/Last_XLF Volume_XLF Open_XLF High_XLF Low_XLF Return_XLF portfolio_uniform_daily_return portfolio_weighted_daily_return portfolio_weighted_daily_return_without_candidate Original_Index
0 2025-01-24 607.97 34604690 609.81 610.78 606.8 NaN 2025-01-24 79.49 8385099 ... 50.85 39088670 50.53 50.9 50.48 NaN NaN NaN NaN 0

1 rows × 25 columns

In [7]:
# Add function to sort DataFrame by Original_Index
def sort_by_original_index(data):
    return data.sort_values(by='Original_Index', ascending=False)

# Sort the DataFrames by Original_Index
portfolio_with_candidate_df = sort_by_original_index(portfolio_with_candidate_df)
portfolio_without_candidate_df = sort_by_original_index(portfolio_without_candidate_df)
candidate_df = sort_by_original_index(candidate_df)
In [8]:
# Function to drop observations after a specified number
def drop_excess_observations(dataframe, observations_to_keep):
    """
    Returns a new DataFrame with only the first 'observations_to_keep' rows.
    
    Args:
        dataframe (pd.DataFrame): Input DataFrame to truncate.
        observations_to_keep (int): Number of rows to retain.

    Returns:
        pd.DataFrame: Truncated DataFrame.
    """
    return dataframe.iloc[:observations_to_keep].reset_index(drop=True)



# Apply the function to each DataFrame
portfolio_with_candidate_df = drop_excess_observations(portfolio_with_candidate_df, observations_to_keep)
portfolio_without_candidate_df = drop_excess_observations(portfolio_without_candidate_df, observations_to_keep)
candidate_df = drop_excess_observations(candidate_df, observations_to_keep)
In [9]:
# Automatically detect all return columns for the current portfolio
# DON'T THINK I NEED THE SECOND HALF OF THIS LINE OF CODE " and col != "portfolio_uniform_daily_return""
asset_returns_columns = [col for col in portfolio_with_candidate_df.columns if col.startswith("Return_") and col != "portfolio_uniform_daily_return"]
In [10]:
def generate_efficient_frontier_dirichlet(returns_df, asset_columns, num_samples=100):
    """
    Generates EFF by randomly assigning weights which will always sum to 1. 

    Parameters:
        returns_df (pd.DataFrame): DataFrame with asset return columns.
        asset_columns (list): List of asset return column names.
        num_samples (int): Number of random portfolios to sample.

    Returns:
        pd.DataFrame: Each row is a portfolio with its weights, mean return, and volatility.
    """
    n_assets = len(asset_columns)
    results = []

    for _ in range(num_samples):
        # Generate a random weight vector that sums to 1
        weights = np.random.dirichlet(np.ones(n_assets))

        # Portfolio return time series
        port_return_series = sum(returns_df[col] * weight for col, weight in zip(asset_columns, weights))

        # Portfolio metrics
        mean_return = port_return_series.mean()
        volatility = port_return_series.std()

        # Store weights and metrics
        record = {f"w_{col.split('_')[1]}": round(weight, 3) for col, weight in zip(asset_columns, weights)}
        record.update({
            "mean_return": mean_return,
            "volatility": volatility
        })
        results.append(record)

    frontier_df = pd.DataFrame(results)
    return frontier_df

frontier_df = generate_efficient_frontier_dirichlet(portfolio_with_candidate_df, asset_returns_columns, num_samples=1000)
print(frontier_df.head(2))
   w_SPY  w_XLU  w_XLF  mean_return  volatility
0  0.718  0.053  0.228    -0.000293    0.012115
1  0.306  0.431  0.262    -0.000187    0.011633
In [11]:
#CHANGE COLOR SCHEME TO BE KUROTOSIS OR SKEW... SHARPE RATIO IS POITNLESS HERE SINCE YOU'RE PLOTTING THE EFF

weight_columns = [col for col in frontier_df.columns if col.startswith("w_")]

# Create an interactive scatter plot with Plotly Express
fig = px.scatter(
    frontier_df, 
    x="volatility", 
    y="mean_return",
    color=frontier_df["mean_return"] / frontier_df["volatility"],  # Optional: use a color scale to represent Sharpe ratio
    color_continuous_scale="Viridis",
    hover_data=weight_columns + ["mean_return", "volatility"],  # Display these metrics on hover
    title="Efficient Frontier: Mean Return vs. Volatility"
)

# Update the layout for axis labels
fig.update_layout(
    xaxis_title="Portfolio Volatility (Standard Deviation)",
    yaxis_title="Portfolio Mean Return"
)

# Display the interactive plot
fig.show()
In [12]:
# Example: Adding skewness and kurtosis to frontier_df
frontier_df['skewness'] = frontier_df.apply(lambda row: skew(
    sum(portfolio_with_candidate_df[col] * row[f"w_{col.split('_')[1]}"] 
        for col in asset_returns_columns)
), axis=1)

frontier_df['kurtosis'] = frontier_df.apply(lambda row: kurtosis(
    sum(portfolio_with_candidate_df[col] * row[f"w_{col.split('_')[1]}"] 
        for col in asset_returns_columns)
), axis=1)

# Create a 3D scatter plot with skewness and kurtosis
fig = px.scatter_3d(
    frontier_df,
    x="volatility",
    y="mean_return",
    z="skewness",  # Skewness on the Z-axis
    color="kurtosis",  # Kurtosis as the color scale
    color_continuous_scale="Viridis",
    hover_data=weight_columns + ["mean_return", "volatility", "skewness", "kurtosis"],
    title="3D Efficient Frontier: Volatility, Mean Return, Skewness, and Kurtosis"
)


# Update the layout to make the chart bigger by adjusting width and height
fig.update_layout(
    width=1000,  # Set the width of the chart (e.g., 1000 pixels)
    height=800,  # Set the height of the chart (e.g., 800 pixels)
    scene=dict(
        xaxis_title="Portfolio Volatility (Standard Deviation)",
        yaxis_title="Portfolio Mean Return",
        zaxis_title="Portfolio Skewness"
    ),
    coloraxis_colorbar=dict(title="Kurtosis")  # Label the colorbar
)

# Display the interactive 3D plot
fig.show()

Portfolio_marginal_attributes ¶

Back to Table of Contents

In [13]:
#USE THIS FUNCTION/DATAFRAMES LATER WHEN YOU WANT TO ESTABLISH THE STATIONARITY OF THE KURTOSIS, SKEW, MEAN AND STANDARD DEVIATION

def calculate_cumulative_statistics(dataframe, portfolio_list, weights_dictionary):
    """
    Calculates weighted statistics (mean, standard deviation, skewness, kurtosis)
    cumulatively, row by row up to the current row.

    Returns:
        pd.DataFrame: A new DataFrame with cumulative statistics for each row.
    """
    # Validate input types
    if not isinstance(portfolio_list, list):
        raise ValueError("Expected portfolio_list to be a list of filenames.")
    
    # Extract security names dynamically from the portfolio list
    securities = [csv.split('_')[0] for csv in portfolio_list]
    return_keys = [f"Return_{security}" for security in securities]

    # Rescale the weights to exclude cash (normalize weights to sum to 1)
    invested_weights = {key: weights_dictionary.get(key, 0) for key in return_keys}
    total_invested_weight = sum(invested_weights.values())
    
    if total_invested_weight > 0:
        normalized_weights = {key: weight / total_invested_weight for key, weight in invested_weights.items()}
    else:
        print("Warning: Total invested weight is zero. Cannot rescale weights.")
        return None

    # Initialize columns for cumulative statistics
    stats_df = pd.DataFrame(index=dataframe.index)
    stats_df["cumulative_weighted_mean"] = 0
    stats_df["cumulative_weighted_std_dev"] = 0
    stats_df["cumulative_weighted_skewness"] = 0
    stats_df["cumulative_weighted_kurtosis"] = 0

    # Iterate over rows cumulatively
    for i in range(len(dataframe)):
        weighted_mean = 0
        weighted_std_dev = 0
        weighted_skewness = 0
        weighted_kurtosis = 0
        
        for return_key in return_keys:
            if return_key in dataframe.columns:
                # Include all rows up to the current row
                cumulative_data = dataframe[return_key].iloc[:i + 1].dropna()
                
                if len(cumulative_data) > 1:  # Ensure enough data for meaningful statistics
                    mean = cumulative_data.mean()
                    std_dev = cumulative_data.std()
                    skewness = skew(cumulative_data)
                    kurt = kurtosis(cumulative_data)
                    
                    weight = normalized_weights.get(return_key, 0)
                    
                    # Apply weights to the metrics
                    weighted_mean += mean * weight
                    weighted_std_dev += std_dev * weight
                    weighted_skewness += skewness * weight
                    weighted_kurtosis += kurt * weight
        
        # Assign cumulative statistics to the row
        stats_df.loc[i, "cumulative_weighted_mean"] = weighted_mean
        stats_df.loc[i, "cumulative_weighted_std_dev"] = weighted_std_dev
        stats_df.loc[i, "cumulative_weighted_skewness"] = weighted_skewness
        stats_df.loc[i, "cumulative_weighted_kurtosis"] = weighted_kurtosis

    return stats_df
In [30]:
print("Portfolio with Candidate DataFrame")
print(calculate_cumulative_statistics(portfolio_with_candidate_df, portfolio_with_candidate, weights_with_candidate).iloc[-1])
print()  # Blank line

print("Portfolio without Candidate DataFram")
print(calculate_cumulative_statistics(portfolio_without_candidate_df, portfolio_with_candidate, weights_with_candidate).iloc[-1])
print()  # Blank line

print("Candidate DataFrame")
print(calculate_cumulative_statistics(candidate_df, portfolio_with_candidate, weights_with_candidate).iloc[-1])
Portfolio with Candidate DataFrame
cumulative_weighted_mean        -0.000178
cumulative_weighted_std_dev      0.013109
cumulative_weighted_skewness     1.016090
cumulative_weighted_kurtosis    19.852011
Name: 1499, dtype: float64

Portfolio without Candidate DataFram
cumulative_weighted_mean       -0.000140
cumulative_weighted_std_dev     0.006623
cumulative_weighted_skewness    0.648067
cumulative_weighted_kurtosis    9.678355
Name: 1499, dtype: float64

Candidate DataFrame
cumulative_weighted_mean        -0.000038
cumulative_weighted_std_dev      0.006486
cumulative_weighted_skewness     0.368022
cumulative_weighted_kurtosis    10.173656
Name: 1499, dtype: float64
In [15]:
def plot_weighted_return_distributions(returns_with_candidate, returns_without_candidate, stats):
    """
    Plots the PDF (kernel density estimate) with histograms underneath for two distributions: 
    "With Candidate" and "Without Candidate".

    Args:
        returns_with_candidate (pd.Series): Weighted returns for "With Candidate" portfolio.
        returns_without_candidate (pd.Series): Weighted returns for "Without Candidate" portfolio.
        stats (dict): Dictionary containing mean, std dev, kurtosis, and skew for each portfolio.
    """
    # Create a dictionary for the data
    returns_data = {
        "With Candidate": returns_with_candidate.dropna(),
        "Without Candidate": returns_without_candidate.dropna()
    }
    
    # Initialize a figure
    plt.figure(figsize=(12, 6))
    colors = ["blue", "green"]
    
    # Overlay histograms and density plots
    for (label, data), color in zip(returns_data.items(), colors):
        sns.histplot(data, bins=50, kde=True, color=color, label=f"{label}\n"
            f"Mean: {stats[label]['Mean']:.4f}\n"
            f"Std Dev: {stats[label]['Std Dev']:.4f}\n"
            f"Kurtosis: {stats[label]['Kurtosis']:.4f}\n"
            f"Skew: {stats[label]['Skew']:.4f}",
            alpha=0.4)  # Adjust opacity for histograms
    
    # Customize plot
    plt.legend(fontsize=10)
    plt.title("Portfolio Weighted Return Distributions")
    plt.xlabel("Daily Weighted Return")
    plt.ylabel("Frequency")
    plt.grid(True)
    plt.show()
In [16]:
weighted_returns_data = {
    "With Candidate": portfolio_with_candidate_df["portfolio_weighted_daily_return"],
    "Without Candidate": portfolio_without_candidate_df["portfolio_weighted_daily_return_without_candidate"]
}

stats = {
    label: {
        "Mean": np.mean(data.dropna()),
        "Std Dev": np.std(data.dropna()),
        "Kurtosis": kurtosis(data.dropna()),
        "Skew": skew(data.dropna())
    }
    for label, data in weighted_returns_data.items()
}

# Call the adjusted function
plot_weighted_return_distributions(
    returns_with_candidate=portfolio_with_candidate_df["portfolio_weighted_daily_return"],
    returns_without_candidate=portfolio_without_candidate_df["portfolio_weighted_daily_return_without_candidate"],
    stats=stats
)
In [17]:
def trimmed_std_dev(data, trim_percent=0.02):
    """
    Calculate the trimmed standard deviation for a portfolio's average daily return.

    Parameters:
    - data (pd.DataFrame): DataFrame containing daily returns of all securities
    - trim_percent (float): Percentage of observations to trim from each end (default 10%)

    Returns:
    - float: Trimmed standard deviation of the portfolio
    """
    # Select only return columns (avoid including other numerical data)
    return_columns = [col for col in data.columns if col.startswith("Return_")]
    if not return_columns:
        raise ValueError("No return columns found in the dataset!")

    # Compute the portfolio's daily return (average of all securities' daily returns)
    data["portfolio_uniform_daily_return"] = data[return_columns].mean(axis=1)

    # Extract the portfolio daily returns as a series
    portfolio_returns = data["portfolio_uniform_daily_return"].dropna().values

    # Trim the extreme observations
    trim_count = int(len(portfolio_returns) * trim_percent)
    sorted_returns = np.sort(portfolio_returns)
    trimmed_returns = sorted_returns[trim_count:-trim_count]  # Trim bottom & top values

    # Compute and return standard deviation of the trimmed dataset
    return np.std(trimmed_returns, ddof=1)
In [25]:
# Compute trimmed standard deviations
trimmed_std_portfolio_without_candidate = trimmed_std_dev(portfolio_without_candidate_df, trim_percent=0.02)
trimmed_std_portfolio_with_candidate = trimmed_std_dev(portfolio_with_candidate_df, trim_percent=0.02)
trimmed_std_candidate = trimmed_std_dev(candidate_df, trim_percent=0.02)
In [19]:
def gini_mean_difference(data, column):
    """
    Calculate the Gini mean difference for a specified column in a DataFrame.
    
    Args:
        data (pd.DataFrame): The DataFrame containing the data.
        column (str): The column name for which to calculate the Gini mean difference.
        
    Returns:
        float: The Gini mean difference.
    """
    # Extract the specified column values
    values = data[column].dropna().values
    
    # Calculate the absolute differences between all pairs of elements
    diff_matrix = np.abs(np.subtract.outer(values, values))
    
    # Calculate the mean of the absolute differences
    gini_mean_diff = np.mean(diff_matrix)
    
    return gini_mean_diff
In [24]:
gini_portfolio_with_candidate = gini_mean_difference(portfolio_with_candidate_df, 'portfolio_uniform_daily_return')
gini_portfolio_without_candidate = gini_mean_difference(portfolio_without_candidate_df, 'portfolio_uniform_daily_return')
gini_candidate = gini_mean_difference(candidate_df, 'portfolio_uniform_daily_return')

DOUBLE CHECK THE STATIONARITY OF GINI MEAN COEFFICENT AND TRIMMED STANDARD DEVIATION

In [21]:
def plot_clustered_bar_chart_with_labels(gini_values, trimmed_std_values, labels):
    """
    Plots a clustered bar chart with value labels for Gini mean difference
    and trimmed standard deviation for three portfolios.

    Args:
        gini_values (list): A list of Gini mean differences for the portfolios.
        trimmed_std_values (list): A list of trimmed standard deviations for the portfolios.
        labels (list): A list of labels for the portfolios.
    """
    # Number of portfolios
    n_portfolios = len(labels)

    # Bar positions
    x = np.arange(n_portfolios)  # X-axis positions for the groups
    bar_width = 0.35  # Width of each bar

    # Plot bars
    plt.figure(figsize=(10, 6))
    gini_bars = plt.bar(x - bar_width / 2, gini_values, width=bar_width, label='Gini Mean Difference', color='skyblue')
    std_bars = plt.bar(x + bar_width / 2, trimmed_std_values, width=bar_width, label='Trimmed Std Dev', color='lightcoral')

    # Add labels and title
    plt.title('Comparison of Portfolio Statistics')
    plt.xlabel('Portfolios')
    plt.ylabel('Values')
    plt.xticks(x, labels)  # Set portfolio labels for x-axis ticks
    plt.legend()
    plt.grid(axis="y", linestyle="--", alpha=0.5)

    # Add value labels to the bars
    for bar in gini_bars:
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width() / 2, height, f'{height:.4f}', ha='center', va='bottom', fontsize=10)

    for bar in std_bars:
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width() / 2, height, f'{height:.4f}', ha='center', va='bottom', fontsize=10)

    # Display the plot
    plt.tight_layout()
    plt.show()

# Define the calculated values
gini_values = [gini_portfolio_with_candidate, gini_portfolio_without_candidate, gini_candidate]
trimmed_std_values = [trimmed_std_portfolio_with_candidate, trimmed_std_portfolio_without_candidate, trimmed_std_candidate]
labels = ["With Candidate", "Without Candidate", "Candidate Only"]

# Call the function to plot the bar chart
plot_clustered_bar_chart_with_labels(gini_values, trimmed_std_values, labels)

Portfolio_attributes ¶

Back to Table of Contents

Shape of the Distribution (Skewness, Kurtosis, Standard Deviation, Mean): Since you're working with returns, which are essentially first-differenced prices, you're correct that they've already been detrended to some extent. This makes the assumption of mean-variance stationarity less critical.

Even though financial returns can still exhibit non-stationary behavior (e.g., volatility clustering), their distributional properties (like skewness and kurtosis) are relatively stable over time if calculated over a large enough sample.

In this case, you don't necessarily need to account for stationarity explicitly unless your analysis spans vastly different market conditions (like a bull market vs. a bear market).

You're absolutely right that for metrics like beta and covariance, which depend on relationships between securities, stationarity is more crucial. If the underlying data isn't stationary, these metrics could fluctuate unpredictably over time, making them unreliable.

You're absolutely correct that if your focus is on the stability of variance, covariance, and beta, the concept of mean stationarity isn't particularly relevant.Don't use dickey-fuller or ADF

In [22]:
# Select only the "Close/Last_" columns
close_columns = [col for col in portfolio_with_candidate_df.columns if col.startswith('Close/Last_')]

# Calculate daily percentage returns
returns = portfolio_with_candidate_df[close_columns].pct_change()

# Drop NaN values (from the first row caused by pct_change)
returns = returns.dropna()

# 1. Correlation Matrix
correlation_matrix = returns.corr()

# 2. Covariance Matrix
covariance_matrix = returns.cov()

# 3. Beta Matrix - Adjust to have the same shape as correlation and covariance matrices
benchmark = close_columns[0]
betas = {}

# Initialize beta_matrix with NaN values to match the size of the correlation and covariance matrices
beta_matrix = pd.DataFrame(np.nan, index=close_columns, columns=close_columns)

for col in close_columns:
    for row in close_columns:
        if col == row:
            beta_matrix.loc[row, col] = 1
        else:
            beta = covariance_matrix.loc[col, benchmark] / covariance_matrix.loc[benchmark, benchmark] 
            beta_matrix.loc[col, benchmark] = beta
In [27]:
# Assuming 'correlation_matrix' and 'covariance_matrix_mu' are already defined,
# and you have already computed these matrices.

# Create a dictionary with only the matrices you want to plot together
matrices = {
    "Correlation Matrix": correlation_matrix,
    "Covariance Matrix (mu)": covariance_matrix_mu
}

num_matrices = len(matrices)

# Create subplots dynamically (without Beta Matrix)
fig = make_subplots(
    rows=1, cols=num_matrices,  
    subplot_titles=list(matrices.keys()),
    column_widths=[1/num_matrices] * num_matrices,  
    shared_yaxes=True,
    shared_xaxes=True
)

# Adjust x positions for colorbars below each heatmap
x_positions = np.linspace(0.15, 0.85, num_matrices)  # evenly spread

# Add each matrix as a heatmap dynamically
for i, (title, matrix) in enumerate(matrices.items(), start=1):
    fig.add_trace(
        go.Heatmap(
            z=matrix.values,
            x=matrix.columns,
            y=matrix.columns,
            colorscale="RdBu",
            colorbar=dict(
                title=title.split()[0],  # e.g., "Correlation" or "Covariance"
                tickvals=[matrix.values.min(), 0, matrix.values.max()],
                yanchor="top",
                y=-0.25,
                x=x_positions[i - 1],
                xanchor="center",
                orientation="v"
            ),
            text=matrix.values.round(2),
            texttemplate="%{text}",
            showscale=True,
            hoverinfo="skip"
        ),
        row=1, col=i
    )

# Update overall layout
fig.update_layout(
    title="Correlation and Covariance (mu) Matrices",
    height=750,
    showlegend=False,
    title_x=0.5
)

fig.show()
In [28]:
# Define your benchmark (assumed to be the first asset)
benchmark = close_columns[0]  # e.g., 'Close/Last_SPY'

# Extract the single column from the beta matrix as a DataFrame
beta_column = beta_matrix[[benchmark]]  # double brackets keep it as a DataFrame

# Create a heatmap for just that column
fig_beta = go.Figure(
    go.Heatmap(
        z=beta_column.values,
        x=beta_column.columns,
        y=beta_column.index,
        colorscale="RdBu",
        colorbar=dict(
            title=f"Beta vs {benchmark}",
            tickvals=[beta_column.values.min(), 0, beta_column.values.max()]
        ),
        text=beta_column.values.round(2),
        texttemplate="%{text}",
        hoverinfo="skip"
    )
)

# Update layout for the beta column heatmap
fig_beta.update_layout(
    title=f"Beta for Each Asset Relative to {benchmark}",
    xaxis_title=f"Benchmark: {benchmark}",
    yaxis_title="Assets",
    width=400,   # Adjust width as needed
    height=600   # Adjust height as needed
)

fig_beta.show()